#!/usr/bin/env python3

from datetime import timedelta, date, datetime
from csv import DictWriter

import gzip
import logging
import re
import urllib.request
import urllib.error


logging.basicConfig()
logger = logging.getLogger("rebuild-debian-csv")
logger.setLevel(logging.DEBUG)


class Release:
    def __init__(self, fileobj):
        params = {}
        for line in fileobj:
            line = line.decode("utf-8")
            if line.startswith(" ") or ": " not in line:
                continue

            k, v = line.strip().split(": ", 1)
            params[k] = v

        self.label = params.get("Label")
        self.suite = params.get("Suite")
        self.version = params.get("Version")
        self.codename = params.get("Codename")
        self.architectures = params.get("Architectures", "").split(" ")

        SUITE_TO_VERSION = {
            "testing": "96",
            "unstable": "97",
            "experimental": "98",
        }
        self.sortkey = self.label + \
                       (self.version or SUITE_TO_VERSION.get(self.suite, "99"))

    def __repr__(self):
        name = self.label
        if self.version and self.suite and self.suite != self.codename:
            name += f" {self.suite}/{self.version}"
        elif self.version:
            name += f" {self.version}"
        elif self.suite:
            name += f" {self.suite}"
        if self.is_lts():
            name += " LTS"
        if self.codename:
            name += f" (\"{self.codename}\")"

        return name

    def __eq__(self, other):
        return repr(self) == repr(other)

    def __lt__(self, other):
        return self.sortkey < other.sortkey

    def __hash__(self):
        return hash(repr(self))

    def release_date(self):
        if self.label == "Ubuntu" and self.version:
            try:
                return datetime.strptime(self.version, "%y.%m").date()
            except ValueError as e:
                logger.warning("Can't parse calver %s: %s", self.version, e)

    def is_lts(self):
        release_date = self.release_date()
        if release_date:
            return release_date.year % 2 == 0 and release_date.month == 4
        else:
            return False

    def age(self):
        release_date = self.release_date()
        if release_date:
            return date.today() - release_date

    def is_relevant(self):
        if self.label not in ("Debian", "Ubuntu", ):
            return False

        bl1 = ("oldoldstable", "devel", )
        if self.suite in bl1:
            return False

        bl2 = ("-updates", "-backports", "-security", "-proposed", "-sloppy", )
        if any(self.suite.endswith(suffix) for suffix in bl2):
            return False

        if self.label == "Ubuntu":
            if self.is_lts():
                return self.age() < 5 * timedelta(days=365)
            else:
                return self.age() < timedelta(days=365)

        return True

    def is_experimental(self):
        if self.label == "Debian" and self.suite == "experimental":
            return True
        if self.label == "Ubuntu" and self.age() < timedelta(days=0):
            return True

        return False


def get_releases(url):
    dirlinepattern = re.compile(
        r"\.(/dists/[\w\-]+):"
    )

    filelinepattern = re.compile(
        r"([\w\-]+)"  # permissions (1)
        r"\s+"
        r"(\d+)"  # inodes (2)
        r"\s+"
        r"(\w+)"  # user (3)
        r"\s+"
        r"(\w+)"  # group (4)
        r"\s+"
        r"(\d+)"  # size (5)
        r"\s+"
        r"(\w+\s+\d+\s+\d+:\d+|\w+\s+\d+\s+\d+)"  # datetime (6)
        r"\s+"
        r"(.*)"  # filename (7)
    )

    listurl = url + "/ls-lR.gz"
    with gzip.open(urllib.request.urlopen(listurl), "rt") as f:
        logger.debug("Downloaded %s", listurl)

        while f:
            try:
                dirnameline = next(f).strip()
                assert dirnameline.startswith(".")
                totalline = next(f).strip()
                assert totalline.startswith("total ")
            except StopIteration:
                break

            skipdir = True
            dirlinematch = dirlinepattern.fullmatch(dirnameline)
            if dirlinematch:
                debiandir = dirlinematch.group(1)
                skipdir = False

            for a in f:
                fileline = a.strip()
                if fileline == "":
                    break
                if skipdir:
                    continue

                filelinematch = filelinepattern.fullmatch(fileline)
                if not filelinematch:
                    continue
                filename = filelinematch.group(7)
                if filename == "Release" or filename.startswith("Release ->"):
                    relurl = url + debiandir + "/Release"
                    try:
                        with urllib.request.urlopen(relurl) as u:
                            logger.debug("Downloaded %s", relurl)

                            yield Release(u)
                    except urllib.error.URLError as e:
                        logger.warning("Failed to download %s: %s", relurl, e)


def write_csv(filename, releases, archs):
    with open(filename, "w", newline="") as f:
        w = DictWriter(f, fieldnames=("OS", "Dist", "Arch", "Name", "Exp", ))
        w.writeheader()

        for r in releases:
            if not r.is_relevant():
                continue

            for arch in archs:
                if arch not in r.architectures:
                    continue

                dist = r.codename.lower()
                if dist == "rc-buggy":
                    dist = "experimental"

                w.writerow({
                    "OS": r.label.lower(),
                    "Dist": dist,
                    "Arch": arch,
                    "Name": repr(r),
                    "Exp": r.is_experimental(),
                })
                logger.debug("Wrote %s to file %s", r, filename)


if __name__ == "__main__":
    logger.info("Downloading releases...")
    debianreleases = set(get_releases("http://ftp.debian.org/debian"))
    assert len(debianreleases) > 0
    ubuntureleases = set(get_releases("http://ftp.ubuntu.com/ubuntu"))
    assert len(ubuntureleases) > 0
    releases = list(sorted(debianreleases | ubuntureleases))
    assert len(releases) > 0
    logger.info("Found %d releases", len(releases))

    write_csv("debians-arm.csv", releases, ("armhf", "arm64"))
    logger.info("Wrote debians-arm.csv")

    write_csv("debians-x86.csv", releases, ("i386", "amd64"))
    logger.info("Wrote debians-x86.csv")
